library(recount)
## study SRP039909
url <- download_study('SRP039909')
2022-06-28 15:52:43 downloading file rse_gene.Rdata to SRP039909
trying URL 'http://duffel.rail.bio/recount/v2/SRP039909/rse_gene.Rdata'
Content type 'application/octet-stream' length 4118527 bytes (3.9 MB)
==================================================
downloaded 3.9 MB
## View the url for the file by printing the object url
url
[1] "http://duffel.rail.bio/recount/v2/SRP039909/rse_gene.Rdata"
## Load the data
load(file.path('SRP039909', 'rse_gene.Rdata'))
metadata <- data.frame(run=rse_gene$run, sample=rse_gene$title)
metadata$receptor <- sapply(strsplit(as.character(metadata$sample), ","), `[`, 1)
metadata$treatment <- sapply(strsplit(as.character(metadata$sample), ","), `[`, 2)
metadata$replicate <- sapply(strsplit(as.character(metadata$sample), ","), `[`, 3)
res
log2 fold change (MLE): treatment .Vehicle vs .10.nM.E2
Wald test p-value: treatment .Vehicle vs .10.nM.E2
DataFrame with 28973 rows and 6 columns
baseMean log2FoldChange lfcSE stat
<numeric> <numeric> <numeric> <numeric>
ENSG00000000003.14 710.310163292732 0.00375189646050679 0.0699411120884318 0.0536436488994197
ENSG00000000419.12 1277.70335663972 -0.0396293714479133 0.0606350594514635 -0.653571907184083
ENSG00000000457.13 534.454337864544 0.190891382801625 0.0807561221343446 2.36380075908128
ENSG00000000460.16 1139.94912011033 0.0893914355353174 0.0604567942878786 1.47860032256523
ENSG00000000938.12 5.17432884984207 -0.477508836852796 0.752274043410998 -0.634753838757551
... ... ... ... ...
ENSG00000283684.1 11.6374015607531 0.0760961645679108 0.489025578985948 0.155607738813387
ENSG00000283689.1 7.13658724803512 -0.0739759441575268 0.638815102664648 -0.11580180845593
ENSG00000283691.1 2.15928810443265 0.217424645292482 1.13422956243922 0.191693685734041
ENSG00000283696.1 2.31846960335564 -0.00333504805844505 1.10069587749941 -0.00302994507985413
ENSG00000283697.1 19.0952799785119 0.32170164551859 0.392419171112049 0.819790849175237
pvalue padj
<numeric> <numeric>
ENSG00000000003.14 0.957219079763501 0.999177795065183
ENSG00000000419.12 0.513387648276445 0.999177795065183
ENSG00000000457.13 0.0180885376924355 0.137339986283105
ENSG00000000460.16 0.139247166470467 0.578051470479517
ENSG00000000938.12 0.52558897393342 NA
... ... ...
ENSG00000283684.1 0.876342224564322 0.999177795065183
ENSG00000283689.1 0.907809617219465 0.999177795065183
ENSG00000283691.1 0.847982152319513 NA
ENSG00000283696.1 0.997582457299773 NA
ENSG00000283697.1 0.412335348574565 0.950365206955305
library(DT)
res_table <- as.data.frame(res)
DT::datatable(res_table)
It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.htmlIt seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html
vsd <- vst(dds, blind=FALSE)
plotPCA(vsd, intgroup="treatment")
res_table$logpval <- -log10(res_table$padj)
library(ggplot2)
ggplot(res_table, aes(x=res$log2FoldChange, y=logpval))+ geom_point()
library("pheatmap")
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:20]
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
cluster_cols=FALSE)
5. Do GSEA on the results and plot the top 5 pathways. For your ranking metric, use the stat column from your DESeq2 results.Top is defined by pathways with the lowest adjusted p-values.
gse <- gseGO(geneList=gene_list,
ont ="BP",
keyType = "ENSEMBL",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = organism,
pAdjustMethod = "none")
preparing geneSet collections...
GSEA analysis...
There are ties in the preranked stats (3.56% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.leading edge analysis...
done...
require(DOSE)
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
wrong orderBy parameter; set to default `orderBy = "x"`
6. Make sure to export your R Markdown to HTML and that it comes out looking correctly – example of an HTML export of RMarkdown: https://static-html-pages.s3-us-west-2.amazonaws.com/merck-project/RloopCorrelationSummary.html. Also include a floating TOC, and foldable code snippets that are folded by default.
7. What do the results tell you about your conditions of interest? What is the biological meaning of these results? What future experiments could you perform? (These questions don’t have an exact right answer, just about thinking through the meaning of the results).